library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
time_series_confirmed_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Confirmed")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
time_series_deaths_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Deaths")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed_long)
## # A tibble: 6 x 6
## Province_State Country_Region Lat Long Date Confirmed
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 1/22/20 0
## 2 <NA> Afghanistan 33.9 67.7 1/23/20 0
## 3 <NA> Afghanistan 33.9 67.7 1/24/20 0
## 4 <NA> Afghanistan 33.9 67.7 1/25/20 0
## 5 <NA> Afghanistan 33.9 67.7 1/26/20 0
## 6 <NA> Afghanistan 33.9 67.7 1/27/20 0
head(time_series_deaths_long)
## # A tibble: 6 x 6
## Province_State Country_Region Lat Long Date Deaths
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 1/22/20 0
## 2 <NA> Afghanistan 33.9 67.7 1/23/20 0
## 3 <NA> Afghanistan 33.9 67.7 1/24/20 0
## 4 <NA> Afghanistan 33.9 67.7 1/25/20 0
## 5 <NA> Afghanistan 33.9 67.7 1/26/20 0
## 6 <NA> Afghanistan 33.9 67.7 1/27/20 0
create keys
time_series_confirmed_long <- time_series_confirmed_long %>%
unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
head(time_series_confirmed_long)
## # A tibble: 6 x 7
## Key Province_State Country_Region Lat Long Date Confirmed
## <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 NA.Afghanistan.1/2… <NA> Afghanistan 33.9 67.7 1/22/… 0
## 2 NA.Afghanistan.1/2… <NA> Afghanistan 33.9 67.7 1/23/… 0
## 3 NA.Afghanistan.1/2… <NA> Afghanistan 33.9 67.7 1/24/… 0
## 4 NA.Afghanistan.1/2… <NA> Afghanistan 33.9 67.7 1/25/… 0
## 5 NA.Afghanistan.1/2… <NA> Afghanistan 33.9 67.7 1/26/… 0
## 6 NA.Afghanistan.1/2… <NA> Afghanistan 33.9 67.7 1/27/… 0
time_series_deaths_long <- time_series_deaths_long %>%
unite(Key, Province_State, Country_Region, Date, sep = ".") %>%
select(Key, Deaths)
head(time_series_deaths_long)
## # A tibble: 6 x 2
## Key Deaths
## <chr> <dbl>
## 1 NA.Afghanistan.1/22/20 0
## 2 NA.Afghanistan.1/23/20 0
## 3 NA.Afghanistan.1/24/20 0
## 4 NA.Afghanistan.1/25/20 0
## 5 NA.Afghanistan.1/26/20 0
## 6 NA.Afghanistan.1/27/20 0
join tables
time_series_long_joined <- full_join(time_series_confirmed_long, time_series_deaths_long, by= c("Key")) %>%
select(-Key)
head(time_series_long_joined)
## # A tibble: 6 x 7
## Province_State Country_Region Lat Long Date Confirmed Deaths
## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 1/22/20 0 0
## 2 <NA> Afghanistan 33.9 67.7 1/23/20 0 0
## 3 <NA> Afghanistan 33.9 67.7 1/24/20 0 0
## 4 <NA> Afghanistan 33.9 67.7 1/25/20 0 0
## 5 <NA> Afghanistan 33.9 67.7 1/26/20 0 0
## 6 <NA> Afghanistan 33.9 67.7 1/27/20 0 0
create report table with counts
time_series_long_joined_counts <- time_series_long_joined %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
names_to = "Report_Type", values_to = "Counts")
head(time_series_long_joined_counts)
## # A tibble: 6 x 7
## Province_State Country_Region Lat Long Date Report_Type Counts
## <chr> <chr> <dbl> <dbl> <date> <chr> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 2020-01-22 Confirmed 0
## 2 <NA> Afghanistan 33.9 67.7 2020-01-22 Deaths 0
## 3 <NA> Afghanistan 33.9 67.7 2020-01-23 Confirmed 0
## 4 <NA> Afghanistan 33.9 67.7 2020-01-23 Deaths 0
## 5 <NA> Afghanistan 33.9 67.7 2020-01-24 Confirmed 0
## 6 <NA> Afghanistan 33.9 67.7 2020-01-24 Deaths 0
pdf("images/time_series_example_plot.pdf", width=6, height=3)
time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region == "US") %>%
ggplot(aes(x = Date, y = Deaths, color = Deaths)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Deaths")
dev.off()
## quartz_off_screen
## 2
ppi <- 300
png("images/time_series_example_plot.png", width=6*ppi, height=6*ppi, res=ppi)
time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region == "US") %>%
ggplot(aes(x = Date, y = Deaths, color = Deaths)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Deaths")
dev.off()
## quartz_off_screen
## 2
head(time_series_long_joined)
## # A tibble: 6 x 7
## Province_State Country_Region Lat Long Date Confirmed Deaths
## <chr> <chr> <dbl> <dbl> <date> <dbl> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 2020-01-22 0 0
## 2 <NA> Afghanistan 33.9 67.7 2020-01-23 0 0
## 3 <NA> Afghanistan 33.9 67.7 2020-01-24 0 0
## 4 <NA> Afghanistan 33.9 67.7 2020-01-25 0 0
## 5 <NA> Afghanistan 33.9 67.7 2020-01-26 0 0
## 6 <NA> Afghanistan 33.9 67.7 2020-01-27 0 0
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(
time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region == "US") %>%
ggplot(aes(x = Date, y = Deaths)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Deaths")
)
library(gganimate)
library(transformr)
theme_set(theme_bw())
library(gifski)
library(av)
data_time <- time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region %in% c("China","Korea, South","Japan","Italy","US"))
p <- ggplot(data_time, aes(x = Date, y = Confirmed, color = Country_Region)) +
geom_point() +
geom_line() +
ggtitle("Confirmed COVID-19 Cases") +
geom_point(aes(group = seq_along(Date))) +
transition_reveal(Date)
animate(p,renderer = gifski_renderer(), end_pause = 15)
